Expert(Sto) Return Avg: 5344.21, std: 84.45
array([5468.355, 5355.007, 5318.677, 5209.376, 5386.716, 5345.665,
       5374.757, 5456.733, 5392.301, 5393.831, 5434.147, 5408.793,
       5196.358, 5284.467, 5458.316, 5404.685, 5399.014, 5155.62 ,
       5387.207, 5305.797, 5358.304, 5413.285, 5440.836, 5368.105,
       5348.113, 5374.864, 5368.985, 5447.216, 5153.679, 5471.13 ,
       5374.028, 5401.202, 5397.05 , 5292.218, 5350.855, 5335.233,
       5345.407, 5257.378, 5286.984, 5326.554, 5309.479, 5422.897,
       5448.194, 5223.849, 5425.741, 5349.688, 5370.058, 5343.503,
       5418.851, 5407.414, 5372.199, 5289.034, 5409.694, 5373.184,
       5179.679, 5256.259, 5181.917, 5377.656, 5224.78 , 5334.359,
       5144.533, 5177.68 , 5394.084, 5347.327])
Expert(Det) Return Avg: 5469.11, std: 18.04
array([5456.919, 5495.81 , 5458.904, 5474.388, 5471.915, 5467.229,
       5478.105, 5478.286, 5464.968, 5470.558, 5490.823, 5474.357,
       5478.27 , 5467.386, 5439.668, 5465.793, 5454.164, 5481.049,
       5461.083, 5441.884, 5485.509, 5487.306, 5447.391, 5452.376,
       5459.001, 5433.509, 5471.319, 5480.48 , 5432.559, 5493.299,
       5466.068, 5495.507, 5437.043, 5490.142, 5489.595, 5487.851,
       5456.102, 5481.359, 5454.283, 5468.505, 5448.61 , 5490.744,
       5458.486, 5452.83 , 5502.641, 5421.885, 5463.026, 5457.453,
       5473.163, 5444.652, 5456.257, 5458.097, 5465.05 , 5457.328,
       5492.209, 5477.239, 5493.648, 5479.241, 5488.918, 5491.014,
       5460.671, 5485.629, 5486.484, 5477.05 ])
ordereddict([('seed', 1), ('cuda', -1), ('env', ordereddict([('env_name', 'Walker2dFH-v0'), ('T', 1000)])), ('sac', ordereddict([('epochs', 3000), ('log_step_interval', 5000), ('update_every', 50), ('update_num', 1), ('random_explore_episodes', 10), ('batch_size', 100), ('lr', 0.001), ('alpha', 0.2), ('automatic_alpha_tuning', False), ('buffer_size', 1000000), ('num_test_episodes', 10)])), ('expert', ordereddict([('samples_episode', 64)]))])